# ==============================================================================
# tweets-sentiment-geo.R
# author: Bernhard Clemm
# ==============================================================================

dir <- dirname(dirname(rstudioapi::getSourceEditorContext()$path))
source(paste0(dir, "/code/setup-packages.R"))

# Get MPs by electoral district ================================================

## This relies on Wikipedia tables which might change
## table can also be imported further below

# Scrape Wikipedia table 2021
url <- "https://de.wikipedia.org/wiki/Liste_der_Mitglieder_des_Deutschen_Bundestages_(20._Wahlperiode)"
mps_2021_01 <- url %>%
  read_html() %>%
  html_nodes("table") %>%
  .[[3]] %>%
  html_table()
names(mps_2021_01) <- c(
  "bild", "name", "birth", "party",
  "bundesland", "list", "wahlkreis",
  "erststimme", "profession", "since", "notes"
)

# Cleaning
mps_2021_02 <- mps_2021_01 %>%
  select(name, party, wahlkreis, erststimme) %>%
  mutate(
    election_date = "2021-09-26",
    erststimme = (trimws(gsub(",", ".", gsub("%|–", "", .$erststimme)),
      which = "both"
    )),
    wahlkreis = ifelse(wahlkreis == "–", NA_character_, wahlkreis)
  ) %>%
  separate(name,
    sep = " ", into = c("first_name", "last_name"),
    remove = F
  ) %>%
  # correct last names where several words
  mutate(last_name = case_when(
    name == "Tobias B. Bacherle" ~ "Bacherle",
    name == "Olaf in der Beek" ~ "in der Beek",
    name == "Matthias W. Birkwald" ~ "Birkwald",
    name == "Alexander Graf Lambsdorff" ~ "Graf Lambsdorff",
    name == "Michael Georg Link" ~ "Link",
    name == "Erik von Malottki" ~ "von Malottki",
    name == "Takis Mehmet Ali" ~ "Ali",
    name == "Amira Mohamed Ali" ~ "Ali",
    name == "Konstantin von Notz" ~ "von Notz",
    name == "Tobias Matthias Peterka" ~ "Peterka",
    name == "Kassem Taher Saleh" ~ "Saleh",
    name == "Catarina dos Santos Firnhaber" ~ "Firnhaber",
    name == "Christian von Stetten" ~ "von Stetten",
    name == "Beatrix von Storch" ~ "von Storch",
    name == "Christoph de Vries" ~ "de Vries",
    TRUE ~ as.character(last_name)
  ))

# Scrape Wikipedia table 2017
url <- "https://de.wikipedia.org/wiki/Liste_der_Mitglieder_des_Deutschen_Bundestages_(19._Wahlperiode)"
mps_2017_01 <- url %>%
  read_html() %>%
  html_nodes("table") %>%
  .[[4]] %>%
  html_table()
names(mps_2017_01) <- c(
  "bild", "name", "birth", "party",
  "bundesland", "wahlkreis", "erststimme", "notes",
  "profession"
)

# Cleaning
mps_2017_02 <- mps_2017_01 %>%
  select(name, party, wahlkreis, erststimme) %>%
  mutate(
    election_date = "2017-09-24",
    erststimme = trimws(gsub(",", ".", gsub("%|-|–", "", .$erststimme)))
  ) %>%
  separate(name,
    sep = " ", into = c("first_name", "last_name"),
    remove = F
  ) %>%
  mutate(last_name = case_when(
    name == "Michael von Abercron" ~ "von Abercron",
    name == "Olaf in der Beek" ~ "in der Beek",
    name == "Daniela De Ridder" ~ "De Ridder",
    name == "Lorenz Gösta Beutin" ~ "Beutin",
    name == "Matthias W. Birkwald" ~ "Birkwald",
    name == "Berengar Elsner von Gronow" ~ "Elsner von Gronow",
    name == "Wilhelm von Gottberg" ~ "von Gottberg",
    name == "Ottmar von Holtz" ~ "von Holtz",
    name == "Alexander Graf Lambsdorff" ~ "Graf Lambsdorff",
    name == "Karl A. Lamers" ~ "Lamers",
    name == "Michael Georg Link" ~ "Link",
    name == "Thomas de Maizière" ~ "de Maizière",
    name == "Matern von Marschall" ~ "von Marschall",
    name == "Hans-Georg von der Marwitz" ~ "von der Marwitz",
    name == "Fabio De Masi" ~ "De Masi",
    name == "Amira Mohamed Ali" ~ "Ali",
    name == "Tobias Matthias Peterka" ~ "Peterka",
    name == "Konstantin von Notz" ~ "von Notz",
    name == "Paul Viktor Podolay" ~ "Podolay",
    name == "Ernst Dieter Rossmann" ~ "Rossmann",
    name == "Hermann Otto Solms" ~ "Solms",
    name == "Helin Evrim Sommer" ~ "Sommer",
    name == "Christian von Stetten" ~ "von Stetten",
    name == "Beatrix von Storch" ~ "von Storch",
    name == "Christoph de Vries" ~ "de Vries",
    name == "Kees de Vries" ~ "de Vries",
    TRUE ~ as.character(last_name)
  ))

# Scrape Wikipedia table 2013
url <- "https://de.wikipedia.org/wiki/Liste_der_Mitglieder_des_Deutschen_Bundestages_(18._Wahlperiode)"
mps_2013_01 <- url %>%
  read_html() %>%
  html_nodes("table") %>%
  .[[2]] %>%
  html_table()
names(mps_2013_01) <- c(
  "name", "birth", "party",
  "bundesland", "wahlkreis", "erststimme", "notes"
)

# Cleaning
mps_2013_02 <- mps_2013_01 %>%
  select(name, party, wahlkreis, erststimme) %>%
  mutate(
    election_date = "2013-09-22",
    erststimme = trimws(gsub(",", ".", gsub("%|-|–", "", .$erststimme)))
  ) %>%
  separate(name,
    sep = " ", into = c("first_name", "last_name"),
    remove = F
  ) %>%
  mutate(last_name = case_when(
    name == "Jan van Aken" ~ "van Aken",
    name == "Cajus Julius Caesar" ~ "Caesar",
    name == "Mathias Edwin Höschel" ~ "Höschel",
    name == "Charles M. Huber" ~ "Huber",
    name == "Karl A. Lamers" ~ "Lamers",
    name == "Ernst Dieter Rossmann" ~ "Rossmann",
    name == "Philipp Graf von und zu Lerchenfeld" ~ "Graf von und zu Lerchenfeld",
    name == "Ursula von der Leyen" ~ "von der Leyen",
    name == "Thomas de Maizière" ~ "de Maizière",
    name == "Hans-Georg von der Marwitz" ~ "von der Marwitz",
    name == "Konstantin von Notz" ~ "von Notz",
    name == "Christian von Stetten" ~ "von Stetten",
    name == "Kees de Vries" ~ "de Vries",
    TRUE ~ as.character(last_name)
  ))

mps <- rbind(mps_2013_02, mps_2017_02, mps_2021_02)
rm(
  mps_2013_01, mps_2017_01, mps_2021_01,
  mps_2013_02, mps_2017_02, mps_2021_02
)

# Matching MPs to Twitter users ================================================

mps <- read.csv(paste0(dir, "/data/twitter/crosswalks/mps.csv"))

mps_twitter <- read.csv(
  paste0(dir, "/data/twitter/tweets_wolf_sentis.csv")
) %>%
  distinct(party, first_name, last_name, screen_name) %>%
  mutate(first_name = word(first_name, 1)) %>%
  distinct() %>%
  # correct some first names for matching
  mutate(first_name = case_when(
    last_name == "Frohnmaier" ~ "Markus",
    last_name == "Wundrak" ~ "Joachim",
    last_name == "Lucassen" ~ "Rüdiger",
    TRUE ~ as.character(first_name)
  ))

mps_02 <- mps %>%
  select(-c(name)) %>%
  left_join(., mps_twitter %>% select(-party),
    by = c("first_name", "last_name")
  ) %>%
  group_by(first_name, last_name, party) %>%
  mutate(n = n())

# Summarize wolf tweets by MP/election period ==================================

tweets_wolf <- read.csv(paste0(dir, "/data/twitter/tweets_wolf_sentis.csv"))

tweets_wolf_summ <- tweets_wolf %>%
  mutate(election_date = case_when(
    tweet_created_at_date <= "2013-09-22" ~ "2013-09-22",
    tweet_created_at_date > "2013-09-22" & tweet_created_at_date <= "2017-09-24" ~ "2017-09-24",
    tweet_created_at_date > "2017-09-24" & tweet_created_at_date <= "2021-09-26" ~ "2021-09-26"
  )) %>%
  group_by(party, screen_name, election_date) %>%
  summarize(
    n_tweets = n(),
    senti_mean = mean(sentiws_stand, na.rm = T),
    senti_sd = sd(sentiws_stand, na.rm = T)
  ) %>%
  ungroup()

mps_03 <- mps_02 %>%
  left_join(., tweets_wolf_summ %>% select(-party),
    by = c("screen_name", "election_date")
  )

mps_04 <- mps_03 %>%
  mutate(
    tweeted = as.factor(ifelse(!is.na(n_tweets), "1", "0")),
    erststimme = stringr::str_trim(gsub("§|&", "", erststimme))
  ) %>%
  mutate(erststimme = as.numeric(erststimme)) %>%
  mutate(afd = ifelse(grepl("AfD", party), "AfD", "Other"))

# Additional step: fill in 2017 Wahlkreise =====================================

## Assuming that for 20 AfD MPs who tweeted between 2017 and 2021,
## Wahlkreis_2017 equals Wahlkreis_2021

# Get AfD tweeters for which we need 2017 Wahlkreis
afd_tweeters <- tweets_wolf_summ %>%
  filter(party == "AfD" & election_date == "2021-09-26") %>%
  pull(screen_name)

# Make crosswalk AfD MPs who tweeted and their 2021 wahlkreis
afd_tweeters_cw <- mps_04 %>%
  ungroup() %>%
  filter(screen_name %in% afd_tweeters & election_date == "2021-09-26") %>%
  select(screen_name, wahlkreis) %>%
  rename("wahlkreis_estimated" = wahlkreis) %>%
  # assign election date for which we estimate wahlkreis
  mutate(election_date = "2017-09-24")

mps_05 <- mps_04 %>%
  left_join(., afd_tweeters_cw, by = c("screen_name", "election_date")) %>%
  mutate(wahlkreis_estimated = ifelse(
    is.na(wahlkreis_estimated), wahlkreis, wahlkreis_estimated
  ))

# Matching electoral district to AGS ===========================================

ags <- read_csv(paste0(dir, "/data/twitter/crosswalks/ags.csv"))[, 2:3]
wkr <- read_csv(paste0(dir, "/data/twitter/crosswalks/wkr.csv"))[, 2:3]
intersect <- read_csv(paste0(dir, "/data/twitter/crosswalks/intersection.csv"))[, 2:3]

crosswalk <- wkr %>%
  left_join(., intersect, by = c("wkr_number" = "row.id")) %>%
  left_join(., ags, by = c("col.id" = "rowname"))

# correct wahlkreise in MP data - NOTE: DONE ONLY FOR RELEVANT AFD MPs
mps_06 <- mps_05 %>%
  mutate(wahlkreis_estimated = gsub(" – ", " ", wahlkreis_estimated)) %>%
  mutate(wahlkreis_estimated = case_when(
    wahlkreis_estimated == "Sächsische Schweiz Osterzgebirge" ~ "Sächsische Schweiz-Osterzgebirge",
    TRUE ~ as.character(wahlkreis_estimated)
  )) %>%
  left_join(., crosswalk, by = c("wahlkreis_estimated" = "WKR_NAME"))

# Matching to wolf attacks =====================================================

btw <- read.csv(paste0(dir, "/data/votes/btw_merge.csv"))

wolf_afd_2021 <- btw %>%
  mutate(ags = ifelse(nchar(ags) == 7, paste0("0", ags), ags)) %>%
  filter(date_election == "2021-09-26" & party == "afd") %>%
  select(ags, percent, attacks_this_election_period, any_attacks_this_period)

mps_afd_2021 <- mps_06 %>%
  ungroup() %>%
  filter(party == "AfD" & election_date == "2021-09-26") %>%
  select(
    first_name, last_name, screen_name, n_tweets, senti_mean, senti_sd,
    wahlkreis, wahlkreis_estimated, AGS, election_date
  ) %>%
  left_join(., wolf_afd_2021, by = c("AGS" = "ags")) %>%
  mutate(n_tweets_rec = ifelse(
    !is.na(screen_name) & is.na(n_tweets), 0, n_tweets
  ))

# Analysis: Do attacks predict tweet sentiment? ==========================================

mps_afd_2021_summ <- mps_afd_2021 %>%
  group_by(
    first_name, last_name, screen_name,
    n_tweets_rec, senti_mean, senti_sd,
    wahlkreis, wahlkreis_estimated
  ) %>%
  summarise(attacks_this_election_period = sum(attacks_this_election_period, na.rm = T)) %>%
  mutate(
    any_attacks_this_period = ifelse(attacks_this_election_period > 0, 1, 0),
    any_tweets = ifelse(n_tweets_rec > 0, 1, 0)
  )

mps_afd_2021_summ %>%
  group_by(any_attacks_this_period) %>%
  summarise(senti = mean(senti_mean, na.rm = T))
t.test(senti_mean ~ any_attacks_this_period, mps_afd_2021_summ)
